clear all
//ssc install runby
//ssc install tsspell
//ssc install asdoc

//Programs Define 
qui{
//These programs below counts how many years gap betwween two observable data in income series
//I seperately define the same program to replication dataset and extendsion dataset
program define re_dta
drop if year < 1895
drop if year > 2003
drop if id > 12
keep year cty id re_*
end
program define up_dta
drop if year < 1895
drop if year > 2018
keep year cty id up_*
end

program define countgapA
re_dta
gen dumA = 1 if re_ts10 == .
tsset year
tsspell dumA
bys _spell: egen missingA = total(dumA), missing
replace missingA=0 if missingA==.
end

program define countgapB
up_dta
gen dumB = 1 if up_ts10 == .
tsset year
tsspell dumB
bys _spell: egen missingB = total(dumB), missing
replace missingB=0 if missingB==.
end
//linear imputation: Top share 10 -- only when the missing years are no more than 4  
//linear imputation: life expectancy, infant mortality, GDP, eduation, health expense
//I seperately define the same program to replication dataset and extendsion dataset
program define LJ_inter_re
runby countgapA, by(cty)
for num 1/12: ipolate re_ts10 year if id==X & missingA<=4, gen(tempts10) \ replace re_ts10=(tempts10) if id==X \ ipolate re_im year if id==X, gen(tempim) \ replace re_im=tempim if id==X \ ipolate re_le year if id==X, gen(temple) \ replace re_le=temple if id==X \ ipolate re_edu year if id==X & year>=1960, gen(tempedu) epolate \ replace re_edu=tempedu if id==X \ ipolate re_phealth year if id==X & year>=1960, gen(tempphealth) epolate \ replace re_phealth=tempphealth if id==X \ ipolate re_thealth year if id==X & year>=1960, gen(tempthealth) epolate \ replace re_thealth=tempthealth if id==X \ ipolate re_gdp year if id==X, gen(tempgdp) \ replace re_gdp=tempgdp if id==X \ drop temp*
end

program define LJ_inter_up
runby countgapB, by(cty)
for num 1/37: ipolate up_ts10 year if id==X & missingB<=4, gen(tempts10) \ replace up_ts10=(tempts10) if id==X \ ipolate up_im year if id==X, gen(tempim) \ replace up_im=tempim if id==X \ ipolate up_le year if id==X, gen(temple) \ replace up_le=temple if id==X \ ipolate up_edu year if id==X & year>=1960, gen(tempedu) epolate \ replace up_edu=tempedu if id==X \ ipolate up_phealth year if id==X & year>=1960, gen(tempphealth) epolate \ replace up_phealth=tempphealth if id==X \ ipolate up_thealth year if id==X & year>=1960, gen(tempthealth) epolate \ replace up_thealth=tempthealth if id==X \ ipolate up_ratio60 year if id==X & year>=1960, gen(tempratio) epolate \ replace up_ratio60=tempratio if id==X \ ipolate up_gdp year if id==X, gen(tempgdp) \ replace up_gdp=tempgdp if id==X \ drop temp*
end

//this program generates log infant mortality, log health expenditures and squred gdp. 
//this program also labels all variables
//I seperately define the same program to replication dataset and extendsion dataset
program define clean_re
gen lnre_im = ln(re_im)
gen re_gdpsq = re_gdp^2
replace re_thealth=re_thealth-re_phealth
for var re_phealth re_thealth: gen lnX=ln(X)
la var re_gdp "replicated Real GDP per capita ($1000s)"
la var re_gdpsq "replicated Real GDP per capita squared ($1000s)"
la var re_le "replicated Life expectancy at birth"
la var re_edu "replicated Educational attainments for the Population Aged 15-64"
la var lnre_phealth "replicated Log real public health spending per capita"
la var lnre_thealth "replicated Log real private health spending per capita"
la var re_im "replicated Infant mortality rate (per 1000 births)"
la var lnre_im "replicated Log infant mortality rate per 1000 births"
la var re_ts10 "replicated Income share of richest 10%"
end

program define clean_up
gen lnup_im = ln(up_im)
replace up_thealth=up_thealth-up_phealth
for var up_phealth up_thealth: gen lnX=ln(X)
gen up_gdpsq=up_gdp^2
la var up_gdp "updated Real GDP per capita ($1000s)"
la var up_gdpsq "updated Real GDP per capita squared ($1000s)"
la var up_le "updated Life expectancy at birth"
la var up_edu "updated Educational attainments for the Population Aged 15-64"
la var lnup_phealth "updatedLog real public health spending per capita"
la var lnup_thealth "updated  Log real private health spending per capita"
la var up_im "updated Infant mortality rate (per 1000 births)"
la var lnup_im "updatedLog infant mortality rate per 1000 births"
la var up_ts10 "WID Income share of richest 10%"
la var up_ratio60 "WID proportion of aging population"
end

//Regression starts
cd "C:\Users\JoscoWu\Desktop\Replication of L&J_paper\Economics review\STATA\revise_data\"
set mem 5m
set matsize 400
}

//TABLE 1
// L&J interpolation using recompiled data

use "Data_replication",clear
keep id cty year re_*
drop if year > 2003
//using L&J linear interpolation in the replication dataset
LJ_inter_re
//label variables in replication dataset
clean_re
xtset id year

//replication of L&J T4 Col.(1)
xtreg re_le re_ts10 re_gdp i.year, fe r cl(id) 
est store a1
//replication of L&J T4 Col.(2)
xtreg lnre_im re_ts10 re_gdp i.year, fe r cl(id) 
est store a2
//replication of L&J T4 Col.(3)
xtreg re_le re_ts10 re_gdp re_gdpsq i.year, fe r cl(id)
est store a3
//replication of L&J T4 Col.(4)
xtreg lnre_im re_ts10 re_gdp re_gdpsq i.year, fe r cl(id)
est store a4

//Shorter period from 1960-2018
drop if year < 1960
//replication of L&J T4 Col.(5)
quietly xtreg re_le re_ts10 re_gdp re_gdpsq re_edu lnre_phealth lnre_thealth i.year, fe r cl(id)
xtreg re_le re_ts10 re_gdp re_gdpsq i.year if e(sample), fe r cl(id) 
est store a5
//replication of L&J T4 Col.(6)
xtreg lnre_im re_ts10 re_gdp re_gdpsq i.year if e(sample), fe r cl(id)  
est store a6
//replication of L&J T4 Col.(7)
xtreg re_le re_ts10 re_gdp re_gdpsq re_edu lnre_phealth lnre_thealth i.year, fe r cl(id)
est store a7
//replication of L&J T4 Col.(8)
xtreg lnre_im re_ts10 re_gdp re_gdpsq re_edu lnre_phealth lnre_thealth i.year, fe r cl(id) 
est store a8

esttab a* using table1.rtf, replace drop(*.year) ///
      b(%12.3f) ci(%12.3f) s(N r2 ar2) star(* 0.1 ** 0.05 *** 0.01) noconstant ///
	    compress plain num dep br 

   
/////////////////////////////////////////////////////// L&J interpolation using WID data 37 ctys/////////////////////////////////////////////////////
use "Data_replication",clear
LJ_inter_up
//label variables in extension dataset
clean_up
xtset id year

//replication of L&J T4 Col.(1)
xtreg up_le up_ts10 up_gdp i.year, fe r cl(id) 
est store b1
//replication of L&J T4 Col.(2)
xtreg lnup_im up_ts10 up_gdp i.year, fe r cl(id) 
est store b2
//replication of L&J T4 Col.(3)
xtreg up_le up_ts10 up_gdp up_gdpsq i.year, fe r cl(id)
est store b3
//replication of L&J T4 Col.(4)
xtreg lnup_im up_ts10 up_gdp up_gdpsq i.year, fe r cl(id)
est store b4

//Shorter period from 1960-2018
drop if year < 1960
//replication of L&J T4 Col.(5)
xtreg up_le up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth i.year, fe r cl(id)
xtreg up_le up_ts10 up_gdp up_gdpsq i.year if e(sample), fe r cl(id) 
est store b5
//replication of L&J T4 Col.(6)
xtreg lnup_im up_ts10 up_gdp up_gdpsq i.year if e(sample), fe r cl(id)  
est store b6
//replication of L&J T4 Col.(7)
xtreg up_le up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth i.year, fe r cl(id)
est store b7
//replication of L&J T4 Col.(8)
xtreg lnup_im up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth i.year, fe r cl(id) 
est store b8

esttab b* using table2A.rtf, replace drop(*.year) ///
      b(%12.3f) ci(%12.3f) s(N r2 ar2) star(* 0.1 ** 0.05 *** 0.01) noconstant ///
	    compress plain num dep br 

/////////////////////////////////////////////////////// L&J interpolation using WID data 23 ctys/////////////////////////////////////////////////////
use "Data_replication",clear
for any Russia Slovenia Bulgaria Estonia Latvia Lithuania Slovakia Chile Greece Hungary Spain Israel Korea Japan: drop if cty=="X"
LJ_inter_up
//label variables in extension dataset
clean_up
xtset id year
//replication of L&J T4 Col.(1)
xtreg up_le up_ts10 up_gdp i.year, fe r cl(id) 
est store c1
//replication of L&J T4 Col.(2)
xtreg lnup_im up_ts10 up_gdp i.year, fe r cl(id) 
est store c2
//replication of L&J T4 Col.(3)
xtreg up_le up_ts10 up_gdp up_gdpsq i.year, fe r cl(id)
est store c3
//replication of L&J T4 Col.(4)
xtreg lnup_im up_ts10 up_gdp up_gdpsq i.year, fe r cl(id)
est store c4

//Shorter period from 1960-2018
drop if year < 1960
//replication of L&J T4 Col.(5)
xtreg up_le up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth i.year, fe r cl(id)
xtreg up_le up_ts10 up_gdp up_gdpsq i.year if e(sample), fe r cl(id) 
est store c5
//replication of L&J T4 Col.(6)
xtreg lnup_im up_ts10 up_gdp up_gdpsq i.year if e(sample), fe r cl(id) 
est store c6
//replication of L&J T4 Col.(7)
xtreg up_le up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth i.year, fe r cl(id)
est store c7
//replication of L&J T4 Col.(8)
xtreg lnup_im up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth i.year, fe r cl(id) 
est store c8

esttab c* using table2B.rtf, replace drop(*.year) ///
      b(%12.3f) ci(%12.3f) s(N r2 ar2) star(* 0.1 ** 0.05 *** 0.01) noconstant ///
	    compress plain num dep br 
//Tabble 4
// L&J recompiled data: complete cases
qui{
use "Data_replication",clear
//label variables in replication dataset
clean_re
xtset id year
//Shorter period from 1960-2018
drop if year < 1960
//replication of L&J T4 Col.(7)
xtreg re_le re_ts10 re_gdp re_gdpsq re_edu lnre_phealth lnre_thealth i.year, fe r cl(id)
est store d1
//replication of L&J T4 Col.(8)
xtreg lnre_im re_ts10 re_gdp re_gdpsq re_edu lnre_phealth lnre_thealth i.year, fe r cl(id) 
est store d2

esttab d* using table4.rtf, replace drop(*.year) ///
      b(%12.3f) ci(%12.3f) s(N r2 ar2) star(* 0.1 ** 0.05 *** 0.01) noconstant ///
	    compress plain num dep br 
}
   
//Tabble 4
// Multiple Imputation
//use L&J recompiled data
//impute for Column (7)
use "Data_replication",clear
keep id cty year re_*
drop if id > 12
drop if year > 2003
drop if year < 1960
clean_re
drop re_gdpsq
tab year, gen(yr)
tab id, gen (iid)
//Imputation model sepcification 
mi set flong
mi register imputed re_ts10 re_le lnre_im re_edu lnre_phealth lnre_thealth
mi register regular iid1-iid12 yr1-yr44 re_gdp
//pilot imputation for Life expectancy sepcification 
mi xtset id year
mi impute mvn re_ts10 re_le lnre_im re_edu lnre_phealth lnre_thealth =iid1-iid12 yr1-yr44 re_gdp, /*
          */ add(5) prior(jeffreys) initmcmc(em, iter(1000) nolog)  /*
		  */ burnin(1000) rseed(123)
mi passive: gen re_gdpsq=re_gdp^2		  
mi xtset id year
mi estimate: xtreg re_le re_ts10 re_gdp re_gdpsq re_edu lnre_phealth lnre_thealth yr1-yr44, fe cl(id)
//this syntax below tells whether needing more imputations to ensure the MI estimates
//of standard errors variate less than 0.05 if impute again	 
how_many_imputations, cv_se(.05) 
mi impute mvn re_ts10 re_le lnre_im re_edu lnre_phealth lnre_thealth =iid1-iid12 yr1-yr44 re_gdp, /*
          */ add(`r(add_M)') prior(jeffreys) initmcmc(em, iter(1000) nolog)  /*
		  */ burnin(1000) rseed(123)
mi passive: replace re_gdpsq=re_gdp^2	
//replication of L&J T4 Col.(7) using MI
mi estimate: xtreg re_le re_ts10 re_gdp re_gdpsq re_edu lnre_phealth lnre_thealth yr1-yr44, fe cl(id) 

//impute for Column (8)
use "Data_replication",clear
drop if id > 12
drop if year > 2003
drop if year < 1960
keep id cty year re_*
clean_re
drop re_gdpsq
tab year, gen(yr)
tab id, gen (iid)
//Imputation model sepcification 
mi set flong
mi register imputed re_ts10 re_le lnre_im re_edu lnre_phealth lnre_thealth
mi register regular iid1-iid12 yr1-yr44 re_gdp
//pilot imputation for Life expectancy sepcification 
mi xtset id year
mi impute mvn re_ts10 re_le lnre_im re_edu lnre_phealth lnre_thealth =iid1-iid12 yr1-yr44 re_gdp, /*
          */ add(5) prior(jeffreys) initmcmc(em, iter(1000) nolog)  /*
		  */ burnin(1000) rseed(123)
mi passive: gen re_gdpsq=re_gdp^2		  
mi xtset id year
mi estimate: xtreg lnre_im re_ts10 re_gdp re_gdpsq re_edu lnre_phealth lnre_thealth yr1-yr44, fe cl(id)
how_many_imputations, cv_se(.05) 
mi impute mvn re_ts10 re_le lnre_im re_edu lnre_phealth lnre_thealth =iid1-iid12 yr1-yr44 re_gdp, /*
          */ add(`r(add_M)') prior(jeffreys) initmcmc(em, iter(1000) nolog)  /*
		  */ burnin(1000) rseed(123)
mi passive: replace re_gdpsq=re_gdp^2	 
//replication of L&J T4 Col.(8) using MI
mi estimate: xtreg lnre_im re_ts10 re_gdp re_gdpsq re_edu lnre_phealth lnre_thealth yr1-yr44, fe cl(id) 


//Tabble 4
// Multiple Imputation
//use good quality extended data
//impute for Column (7)
use "Data_replication",clear
keep id cty year up_*
for any Russia Slovenia Bulgaria Estonia Latvia Lithuania Slovakia Chile Greece Hungary Spain Israel Korea Japan: drop if cty=="X"
drop if year > 2018
drop if year < 1960
clean_up
drop up_gdpsq
tab year, gen(yr)
tab id, gen (iid)
//Imputation model sepcification 
mi set flong
mi register imputed up_ts10 up_le lnup_im up_edu lnup_phealth lnup_thealth up_gdp 
mi register regular iid1-iid23 yr1-yr59 
//pilot imputation for Life expectancy sepcification 
mi xtset id year
mi impute mvn up_ts10 up_le lnup_im up_edu lnup_phealth lnup_thealth up_gdp =iid1-iid23 yr1-yr59 , /*
          */ add(5) prior(jeffreys) initmcmc(em, iter(1000) nolog)  /*
		  */ burnin(1000) rseed(123)
mi passive: gen up_gdpsq=up_gdp^2		  
mi xtset id year
mi estimate: xtreg up_le up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth yr1-yr59, fe cl(id)
//this syntax below tells whether needing more imputations to ensure the MI estimates
//of standard errors variate less than 0.05 if impute again	 
how_many_imputations, cv_se(.05) 
mi impute mvn up_ts10 up_le lnup_im up_edu lnup_phealth lnup_thealth up_gdp =iid1-iid23 yr1-yr59, /*
          */ add(`r(add_M)') prior(jeffreys) initmcmc(em, iter(1000) nolog)  /*
		  */ burnin(1000) rseed(123)
mi passive: replace up_gdpsq=up_gdp^2	
//replication of L&J T4 Col.(7) using MI
mi estimate: xtreg up_le up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth yr1-yr59, fe cl(id) 

//impute for Column (8)
use "Data_replication",clear
for any Russia Slovenia Bulgaria Estonia Latvia Lithuania Slovakia Chile Greece Hungary Spain Israel Korea Japan: drop if cty=="X"
drop if year > 2018
drop if year < 1960
keep id cty year up_*
clean_up
drop up_gdpsq
tab year, gen(yr)
tab id, gen (iid)
//Imputation model sepcification 
mi set flong
mi register imputed up_ts10 up_le lnup_im up_edu lnup_phealth lnup_thealth up_gdp
mi register regular iid1-iid23 yr1-yr59
//pilot imputation for Life expectancy sepcification 
mi xtset id year
mi impute mvn up_ts10 up_le lnup_im up_edu lnup_phealth lnup_thealth up_gdp =iid1-iid23 yr1-yr59, /*
          */ add(5) prior(jeffreys) initmcmc(em, iter(1000) nolog)  /*
		  */ burnin(1000) rseed(123)
mi passive: gen up_gdpsq=up_gdp^2		  
mi xtset id year
mi estimate: xtreg lnup_im up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth yr1-yr59, fe cl(id)
how_many_imputations, cv_se(.05) 
mi impute mvn up_ts10 up_le lnup_im up_edu lnup_phealth lnup_thealth up_gdp =iid1-iid23 yr1-yr59, /*
          */ add(`r(add_M)') prior(jeffreys) initmcmc(em, iter(1000) nolog)  /*
		  */ burnin(1000) rseed(123)
mi passive: replace up_gdpsq=up_gdp^2	 
//replication of L&J T4 Col.(8) using MI
mi estimate: xtreg lnup_im up_ts10 up_gdp up_gdpsq up_edu lnup_phealth lnup_thealth yr1-yr59, fe cl(id) 


//Table 4: missingness summary
use "Data_replication",clear
clean_re
drop if id>12
drop if year < 1960
drop if year > 2003	
keep year re_*
mdesc re_ts10 re_le re_im re_gdp re_edu re_phealth re_thealth		



//===============================================================APPENDIX A===================================================//
use "Data_replication",clear
keep cty id year re_*
clean_re
drop if id>12
drop if year < 1895
drop if year > 2003	
sum re_ts10 re_le lnre_im re_gdp re_edu lnre_phealth lnre_thealth		


//===============================================================APPENDIX B===================================================//
//MI Diagnostics for imputation model in TABLE 4
//MI with reconstructed L&J's data
use "Data_replication",clear
keep id cty year re_*
drop if id > 12
drop if year > 2003
drop if year < 1960
clean_re
drop re_gdpsq
tab year, gen(yr)
tab id, gen (iid)
//Imputation model sepcification 
mi set flong
mi register imputed re_ts10 re_le lnre_im re_edu lnre_phealth lnre_thealth
mi register regular iid1-iid12 yr1-yr44 re_gdp
//pilot imputation for Life expectancy sepcification 
mi xtset id year
mi impute mvn re_ts10 re_le lnre_im re_edu lnre_phealth lnre_thealth =iid1-iid12 yr1-yr44 re_gdp, ///
          mcmconly initmcmc(em, iter(1000) nolog) prior(jeffreys) burnin(1000) burnbetween(100) rseed(123) savewlf(wlf,replace) 


use wlf, clear
des
tsset iter
//Dagnostic of the stationanry (Fig A.1)
tsline wlf, ytitle(Worst linear function) xtitle(Burn-in period) name(stable,replace)
//Dagnostic of the independence (Fig A.2)
ac wlf, title(Worst linear function) ytitle(Autocorrelations) ciopts(astyle(ci)) note("") level(95) name(ac,replace)


//MI with updated data (23 ctys till 2018)

//period 1960-2018
use "Data_replication",clear
for any Russia Slovenia Bulgaria Estonia Latvia Lithuania Slovakia Chile Greece Hungary Spain Israel Korea Japan: drop if cty=="X"
drop if year > 2018
drop if year < 1960
keep id cty year up_*
clean_up
drop up_gdpsq
tab year, gen(yr)
tab id, gen (iid)
mi set flong
mi register imputed up_ts10 up_le lnup_im up_edu lnup_phealth lnup_thealth up_gdp
mi register regular iid1-iid23 yr1-yr59
mi xtset id year
mi impute mvn up_ts10 up_le lnup_im up_edu lnup_phealth lnup_thealth up_gdp =iid1-iid23 yr1-yr59, ///
          mcmconly initmcmc(em, iter(1000) nolog) prior(jeffreys) burnin(1000) burnbetween(100) rseed(123) savewlf(wlf,replace) 
          
use wlf, clear
des
tsset iter
//Dagnostic of the stationanry (Fig A.3)
tsline wlf, ytitle(Worst linear function) xtitle(Burn-in period) name(stable,replace)
//Dagnostic of the independence (Fig A.4)
ac wlf, title(Worst linear function) ytitle(Autocorrelations) ciopts(astyle(ci)) note("") level(95) name(ac,replace)









